## This script takes results from the base scenario in step 5 and produces charts and values used in the report

# Prelims -----------------------------------------------------------------

rm(list=ls())
gc()

memory.limit(120000)

## ensure packages are loaded
source("./R scripts/Master package loading.R")

## source extra data and functions for analysing results
source("./R scripts/Simulation model/Cohort simulation results - functions prep.R")

## version of results to read in, by date
run_date <- "2021-10-26" 


# Read in results data ------------------------------------------------------------

sim_results_w_transfers <- qread(paste0("./OUtput data/sim_results_returns_converge_", run_date, ".qs")) 
sim_results_no_transfers <- qread(paste0("./OUtput data/sim_results_returns_converge_no_transfer_", run_date, ".qs"))

sim_results_historic_housing <- qread(paste0("./Output data/sim_results_historic_housing_", run_date, ".qs"))

sim_results_housing_cf <- qread(paste0("./Output data/sim_results_housing_cf_", run_date, ".qs")) ## housing counterfactual (1 ppt higher return)

sim_results_bequests_only <- qread(paste0("./Output data/sim_results_bequests_only_", run_date, ".qs"))
sim_results_gifts_only <- qread(paste0("./Output data/sim_results_gifts_only_", run_date, ".qs"))


# Pulling out relevant results -------------------------------------

sim_results_w_transfers_tidy <- sim_results_w_transfers %>% 
  tidy_results_fn %>% 
  inc_wealth_group_fn

sim_results_no_transfers_tidy <-  sim_results_no_transfers %>% 
  tidy_results_fn

sim_results_historic_housing_tidy <- sim_results_historic_housing %>%
  tidy_results_fn

sim_results_housing_cf_tidy <- sim_results_housing_cf %>%
  tidy_results_fn

sim_results_bequests_only_tidy <- sim_results_bequests_only %>% 
  tidy_results_fn

sim_results_gifts_only_tidy <- sim_results_gifts_only %>% 
  tidy_results_fn


# Summarised dataframes by group -------------------------------------------------------

## by year
sim_results_year <- sim_results_w_transfers_tidy %>% 
  group_by(year) %>% 
  summarise_vars_fn 

## by year and age
sim_results_year_age <- sim_results_w_transfers_tidy %>% 
  group_by(year, age_grp) %>% 
  summarise_vars_fn

## by year and age -- for no transfers
sim_results_year_age_no_transfers <- sim_results_no_transfers_tidy %>% 
  group_by(year, age_grp) %>% 
  summarise_vars_fn


## by year and age -- for historic housing and housing counterfactual 
sim_results_historic_housing_year_age <- sim_results_historic_housing_tidy %>%
  group_by(year, age_grp) %>%
  summarise_vars_fn

sim_results_housing_cf_year_age <- sim_results_housing_cf_tidy %>%
  group_by(year, age_grp) %>%
  summarise_vars_fn


## by year and initial age grp 0
sim_results_year_age0 <- sim_results_w_transfers_tidy %>% 
  group_by(year, age_grp_0) %>% 
  summarise_vars_fn

## by year and gen 0
sim_results_year_gen0 <- sim_results_w_transfers_tidy %>% 
  group_by(year, gen0) %>% 
  summarise_vars_fn


## by year and age and inc
sim_results_year_age_inc <- sim_results_w_transfers_tidy %>% 
  group_by(year, age_grp, total_inc_qtile) %>% 
  summarise_vars_fn

## by year and age and beqrec
sim_results_year_age_beqrec <- sim_results_w_transfers_tidy %>% 
  group_by(year, age_grp, beqrec) %>% 
  summarise_vars_fn

## by year and age and ho
sim_results_year_age_ho <- sim_results_w_transfers_tidy %>% 
  group_by(year, age_grp, ho) %>% 
  summarise_vars_fn

## by year and age and wealth-age grp
sim_results_year_age_wlth <- sim_results_w_transfers_tidy %>% 
  group_by(year, age_grp, age_wlth_grp3) %>% 
  summarise_vars_fn


## Combined w and wo transfers, age 30-99, limited years
sim_results_box <- sim_results_w_transfers_tidy %>% 
  select(year, starting_cohort, age_grp, age_grp_0, gen0, total_inc_qtile, inc_pathway, beqrec, ho, n, total_wealth_w_transfers_real = total_wealth_real) %>% 
  left_join(sim_results_no_transfers_tidy %>% select(year, starting_cohort, age_grp, age_grp_0, gen0, inc_pathway, beqrec, ho, total_wealth_no_transfers_real = total_wealth_real)) %>% 
  filter(year %in% c(2018, 2028, 2038, 2048, 2020, 2030, 2040, 2050)) %>% 
  ## filter age groups because no births mean no younger ages at later years. 100-105 2020 sensitive 
  filter(age_grp>="[30,35)" & age_grp<="[95,100)") %>% 
  select(year, starting_cohort, age_grp, age_grp_0, gen0, total_inc_qtile, inc_pathway, beqrec, ho, n, total_wealth_w_transfers_real, total_wealth_no_transfers_real) %>% 
  pivot_longer(cols=c(total_wealth_w_transfers_real, total_wealth_no_transfers_real), names_to = "wealth_type", values_to="value") 


## sim results year age for bequests/gifts only
sim_results_year_age_bequests <- sim_results_bequests_only_tidy %>% 
  group_by(year, age_grp) %>% 
  summarise_vars_fn

sim_results_year_age_gifts <- sim_results_gifts_only_tidy %>% 
  group_by(year, age_grp) %>% 
  summarise_vars_fn



# CHAPTER CHARTS AND VALUES -----------------------------------------------


# Wealth distribution by age ----------------------------------------

##  what percentage of wealth does the age group hold...
intergen_wealth_dist <- sim_results_box %>% 
  filter(year %in% c(seq(2028, 2048, 10))) %>% 
  group_by(year, wealth_type) %>% 
  mutate(total_wealth_year = sum(value*n ),
         n_year = sum(n)) %>% 
  group_by(year, wealth_type, age_grp) %>% 
  summarise(
    age_pc = sum(value*n)/mean(total_wealth_year),
    n_pc = sum(n)/mean(n_year))

## get HILDA equivalents - percentage wealth held, excluding non-housing debt
hilda_intergen_wealth_dist <- hilda_grouped %>% 
  filter(wavenumber %in% c(2, 6, 10, 14,  18)) %>% 
  filter(age_grp>="[30,35)" & age_grp<="[95,100)") %>% 
  mutate(total_wealth = housing_assets + super_assets + other_assets - housing_debt) %>% 
  ## calc percentage of wealth held by each age group
  group_by(wavenumber, age_grp) %>% 
  summarise(agg_total_wealth = sum(total_wealth*hhwte),
            agg_housing_wealth = sum(housing_assets*hhwte) - sum(housing_debt*hhwte),
            n = sum(hhwte)) %>% 
  group_by(wavenumber) %>% 
  mutate(age_pc = agg_total_wealth/sum(agg_total_wealth),
         n_pc = n/sum(n),
         year=wavenumber+2000) %>% 
  ungroup %>% 
  select(year, age_grp, age_pc, n_pc)


## *** chart in chapter (figure 3.3)
## ## Percentage of projected 30–99 year old population (column) and wealth (line)
intergen_wealth_dist_line <- intergen_wealth_dist %>% 
  filter(wealth_type=="total_wealth_w_transfers_real" & year>2018) %>% 
  ungroup %>% 
  select(-wealth_type) %>% 
  rbind(hilda_intergen_wealth_dist %>% filter(year %in% c(2002,2010, 2018)) ) %>%  ## NA age_pc - plotted separately by linetype
  mutate(source = ifelse(year<=2018, "Historic", "Projected")) %>% 
  ggplot +
  geom_col(aes(x = as.numeric(age_grp), y=n_pc*100, fill = as.factor(year) ), position="dodge", alpha=0.3) +
  geom_line(aes(x = as.numeric(age_grp), y=age_pc*100, colour=as.factor(year), linetype=as.factor(source))) +
  scale_colour_pc() +
  scale_fill_pc() +
  scale_linetype_manual(values= c("solid", "twodash")) +
  scale_x_continuous(labels = unique(intergen_wealth_dist$age_grp)[seq(1, 14, 2)] %>% age_grp_labeller,
                     breaks = seq(7, 20, 2)) + ## be careful with labelling here - double check carefully
  ylab("Per cent") +
  xlab("Age group") +
  scale_y_continuous(expand=c(0,0)) +
  coord_cartesian(ylim = c(0,15)) +
  guides(colour=guide_legend(nrow=1,byrow=TRUE),
         linetype=guide_legend(order=2, override.aes=list(colour="black"))) +
  custom_plot_margin

emf(file=paste0("./Charts/intergen_wealth_dist_", Sys.Date(), ".emf"),
    width = 15/2.54, height = 8/2.54,
    pointsize=12,
    family="Arial")
intergen_wealth_dist_line
dev.off()

ggsave(file=paste0("./Charts/intergen_wealth_dist_", Sys.Date(), ".svg"), 
                   plot=intergen_wealth_dist_line, 
                   width = 15/2.54, height = 8/2.54)


## *** USED IN CHAPTER
# The base scenario projection suggests that among people aged 30–99, those aged 60 and over accounted for about a third of the population in 2018, 
# rising by about 12 percentage points to 46 per cent in 2048. 
# However, their share of wealth increases by about 22 percentage points to 67 per cent over the same period. 
## Housing wealth represented about half of total wealth for those aged 30–59 in both 2018 and 2048. 
## For people aged 60–99, housing wealth represented about 54 per cent in 2018 and increased to 66 per cent by 2048
wealth_older <- sim_results_year_age %>% 
  filter(age_grp>="[30,35)" & age_grp<="[95,100)") %>% 
  mutate(older = ifelse(age_grp>="[60,65)", 1, 0)) %>% 
  group_by(year) %>% 
  mutate(total_wealth_tot_real_year = sum(total_wealth_tot_real),
         n_tot_year = sum(n_tot)) %>% 
  group_by(year, older) %>% 
  summarise( ## what proportion of total wealth and population do they account for
    wealth_age_pc = sum(total_wealth_tot_real)/mean(total_wealth_tot_real_year),
    n_age_pc = sum(n_tot)/mean(n_tot_year),
    ## percentage of own wealth held in housing
    housing_age_pc = (sum(housing_assets_tot_real) + sum(housing_debt_tot_real) ) / sum(total_wealth_tot_real) )
data.table(wealth_older)[year %in% c(2018, 2048)] 


## *** CHART IN EXEC SUMMARY figure 5a
wealth_older_arrows <- data.frame(
  type=c("Population", "Wealth"),
  arrow_x = c(2031, 2031),
  arrow_y = c(data.table(wealth_older)[older==1 & year==2018, n_age_pc]*100,
              data.table(wealth_older)[older==1 & year==2018, wealth_age_pc]*100), 
  arrow_yend = c(data.table(wealth_older)[older==1 & year==2048, n_age_pc]*100,
                 data.table(wealth_older)[older==1 & year==2048, wealth_age_pc]*100), 
  text_x = c(2008, 2008),
  text_y = c(40, 55),
  text_label = c("+12 ppt", "+22 ppt")
)

wealth_older_plot <- ggplot(wealth_older %>% 
                              filter(older == 1 & year %in% c(2018, 2048)) %>% 
                              pivot_longer(cols=c(wealth_age_pc, n_age_pc), names_to="type", values_to="share") %>% 
                              mutate(type = ifelse(type=="n_age_pc", "Population", "Wealth"))) +
  geom_col(aes(x = year, y=share*100)) +
  facet_wrap(vars(type)) +
  scale_y_continuous(limits=c(0,80), expand=c(0,0)) +
  scale_x_continuous(breaks=c(2018, 2048)) +
  ylab("Percentage") + xlab("Year") +
  custom_plot_margin +
  ## custom arrows/text
  geom_segment(data=wealth_older_arrows,
               aes(x=arrow_x, xend=arrow_x, y=arrow_y, yend=arrow_yend),
               colour= pc_grey,
               size=0.05,
               arrow=arrow(length = unit(0.2, "cm"), type="closed")
  ) +
  geom_text(data=wealth_older_arrows,
            aes(label = text_label, x=text_x, y=text_y ), size=2.9, hjust=0)

emf(file=paste0("./Charts/wealth_older_", Sys.Date(), ".emf"),
    width = 7.4/2.54, height = 6.5/2.54,
    pointsize=12,
    family="Arial")
wealth_older_plot
dev.off()

ggsave(file=paste0("./Charts/wealth_older_", Sys.Date(), ".svg"), 
                   plot=wealth_older_plot,
                   width = 7.4/2.54, height = 6.5/2.54)

## CHART IN EXEC SUMMARY figure 5b
housing_older_plot <- ggplot(wealth_older %>% 
                               filter(year %in% c(2018, 2048)) %>% 
                               mutate(older = ifelse(older==1, "60\u201399", "30\u201359"))) +
  geom_col(aes(x = as.factor(older), y=housing_age_pc*100)) +
  facet_wrap(vars(year)) +
  scale_y_continuous(limits=c(0,80), expand=c(0,0)) +
  ylab("Percentage") + xlab("Age") +
  custom_plot_margin

emf(file=paste0("./Charts/housing_older_", Sys.Date(), ".emf"),
    width = 7.4/2.54, height = 6.5/2.54,
    pointsize=12,
    family="Arial")
housing_older_plot
dev.off()

ggsave(file=paste0("./Charts/housing_older_", Sys.Date(), ".svg"), 
                   plot=housing_older_plot,
                   width = 7.4/2.54, height = 6.5/2.54)

       
## *** USED IN CHAPTER
## HILDA data shows that people aged 60 and over accounted for about 28 per cent of the 30–99 year old population 
## and 31 per cent of their wealth in 2002, with shares of wealth rising faster in the decades since
wealth_older_hilda <- hilda_intergen_wealth_dist %>% 
  filter(age_grp>="[30,35)" & age_grp<="[95,100)") %>% 
  mutate(older = ifelse(age_grp>="[60,65)", 1, 0)) %>% 
  group_by(year, older) %>% 
  summarise(wealth_age_pc = sum(age_pc),
            n_age_pc = sum(n_pc)) %>% 
  filter(older==1)
wealth_older_hilda


## *** USED IN CHAPTER
## People within the 55–74 age range are projected to hold 0.15 percentage points more wealth among the 30–99 year old 
## population in 2048 than they would in a hypothetical world without wealth transfers between generations
diff_intergen_wealth_dist <- intergen_wealth_dist %>% 
  pivot_wider(names_from="wealth_type", values_from="age_pc") %>% 
  mutate(diff = total_wealth_w_transfers_real - total_wealth_no_transfers_real)

diff_intergen_wealth_dist %>% 
  filter(year==2048) %>% 
  mutate(age5574 = ifelse(age_grp>="[55,60)" & age_grp<="[70,75)", 1, 0)) %>% 
  group_by(age5574) %>% 
  summarise(sumdiff = sum(diff))


## *** Chart in chapter (figure 3.5)
## Absolute difference in average wealth with and without transfers - including bequests only and gifts only
## Note, bequests only + gifts only doesn't equal total with transfers due to interaction effects (eg if you get a bequest, can give more gifts)
diff_intergen_wealth_dist_abs <- left_join(sim_results_year_age %>% 
                                             select(year, age_grp, total_wealth_w_transfers_real=total_wealth_av_real),
                                           sim_results_year_age_no_transfers %>% 
                                             select(year, age_grp, total_wealth_no_transfers_real=total_wealth_av_real) ) %>% 
  left_join(sim_results_year_age_bequests %>% 
              select(year, age_grp, total_wealth_w_bequests_real=total_wealth_av_real)) %>% 
  left_join(sim_results_year_age_gifts %>% 
              select(year, age_grp, total_wealth_w_gifts_real=total_wealth_av_real)) %>% 
  filter(year %in% c(2048) & age_grp >= "[30,35)" & age_grp <= "[95,100)") %>% 
  mutate(`Inheritances and gifts  ` = total_wealth_w_transfers_real - total_wealth_no_transfers_real,
         `Inheritances only` = total_wealth_w_bequests_real - total_wealth_no_transfers_real,
         `Gifts only` = total_wealth_w_gifts_real - total_wealth_no_transfers_real) %>% 
  pivot_longer(cols = matches("(Inheritances|only)"), names_to = "diff_type", values_to = "diff") %>% 
  mutate(diff_type = factor(diff_type, levels= c("Inheritances and gifts  ", "Inheritances only", "Gifts only")))

diff_intergen_wealth_dist_abs_plot <- ggplot(diff_intergen_wealth_dist_abs) +
  geom_col(aes(x = as.numeric(age_grp), y=diff/1000, fill=diff_type), position="dodge") +
  scale_fill_pc() +
  ylab("Change in average wealth held\n($ thousand)") +
  xlab("Age group") +
  scale_x_continuous(labels = unique(diff_intergen_wealth_dist_abs$age_grp)[seq(1, 14, 2)] %>% age_grp_labeller,
                     breaks = seq(7, 20, 2)) +
  scale_y_continuous(breaks = seq(-20, 40, 10) ) +
  custom_plot_margin

emf(file=paste0("./Charts/diff_intergen_wealth_dist_abs_plot_", Sys.Date(), ".emf"), 
    width = 15/2.54, height = 8/2.54,
    pointsize=12,
    family="Arial")
diff_intergen_wealth_dist_abs_plot
dev.off()

ggsave(file=paste0("./Charts/diff_intergen_wealth_dist_abs_plot_", Sys.Date(), ".svg"), 
                   plot=diff_intergen_wealth_dist_abs_plot,
                   width = 15/2.54, height = 8/2.54)


# Wealth composition by age -----------------------------------------------

## total wealth by wealth type
## *** USED in CHAPTER 
tot_wealth_by_type_age_year10_data <- sim_results_year_age %>% 
  filter(year %in% c(2018, 2048)) %>% 
  mutate(housing_wealth_tot_real = housing_assets_tot_real + housing_debt_tot_real) %>% 
  select(year, age_grp, housing_wealth_tot_real, super_assets_tot_real, other_assets_tot_real, total_wealth_tot_real) %>% 
  pivot_longer(cols=c(housing_wealth_tot_real, super_assets_tot_real, other_assets_tot_real), 
               names_to = "wealth_type", values_to = "wealth_value") %>% 
  mutate(wealth_type = factor(wealth_type, 
                              levels = c("housing_wealth_tot_real", "super_assets_tot_real", "other_assets_tot_real"),
                              labels = c("Housing", "Superannuation", "Other assets"))) 

## shares of wealth
share_wealth_by_type_age_year10_data <-  tot_wealth_by_type_age_year10_data %>% 
  group_by(year, age_grp) %>% 
  mutate(wealth_share = wealth_value /sum(wealth_value)) 

## *** CHART USED IN CHAPTER (figure 3.4)
share_wealth_by_type_age_year10_point <- share_wealth_by_type_age_year10_data %>% 
  filter(age_grp>="[30,35)" & age_grp<="[95,100)") %>% 
  mutate(wealth_type = factor(wealth_type, labels=c("Housing", "Super", "Other assets"))) %>% 
  group_by(age_grp, wealth_type) %>% 
  mutate(wealth_share_2048 = wealth_share[2],
         increase = ifelse(wealth_share_2048 - wealth_share>0, "Increase", "Decrease") %>% factor(levels=c("Increase", "Decrease"))) %>% 
  ggplot() +
  geom_segment(aes(x=age_grp, y=wealth_share*100, xend=age_grp, yend=wealth_share_2048*100,
                   colour=wealth_type, linetype=increase), 
               size=0.4) +
  geom_point(aes(x=age_grp, y=wealth_share*100, colour=wealth_type, 
                 shape=as.factor(year), size=as.factor(year))) +
  scale_x_discrete(breaks=unique(sim_results_year_age$age_grp)[seq(1, 21, 2)],
                   labels = unique(sim_results_year_age$age_grp)[seq(1, 21, 2)] %>% age_grp_labeller()) +
  scale_colour_pc() +
  scale_shape_manual(values = c(1,16)) +
  scale_size_manual(values=c(2,3)) +
  ylab("Percentage of total wealth") +
  xlab("Age group") +
  custom_plot_margin +
  guides(colour=guide_legend(order=1, override.aes=list(size=2)),
         shape=guide_legend(order=2, override.aes=list(colour="black")),
         size=guide_legend(order=2)) +
  theme(legend.spacing.x=unit(0.3, "lines"),
        legend.margin=margin(0,0,0,0)) 


emf(file=paste0("./Charts/share_wealth_by_type_age_year10_point", Sys.Date(), ".emf"), 
    width = 15/2.54, height = 7.5/2.54,
    pointsize=12,
    family="Arial")
share_wealth_by_type_age_year10_point
dev.off()

ggsave(file=paste0("./Charts/share_wealth_by_type_age_year10_point", Sys.Date(), ".svg"), 
       plot=share_wealth_by_type_age_year10_point,
       width = 15/2.54, height = 7.5/2.54)



# Wealth inequality (Gini) model population ---------------------------------------------------------

## no transfers
gini_no_transfers <- sim_results_no_transfers_tidy %>% 
  filter(year %in% seq(2018, 2050, 5)) %>% 
  ## filter age groups as well as some disappear over time
  filter(age_grp>="[30,35)" & age_grp<="[95,100)") %>% 
  split(., .$year) %>% 
  ## for each year, calc the Gini coefficient for that pop
  lapply(., function(x) {
    data.table(
      year=x[[1, "year"]],
      gini_no_transfers=gini(x$total_wealth, weights=x$n)
    )
  }) %>% 
  rbindlist

## w transfers
gini_w_transfers <- sim_results_w_transfers_tidy %>% 
  filter(year %in% seq(2018, 2050, 5)) %>% 
  ## filter age groups as well as some disappear over time
  filter(age_grp>="[30,35)" & age_grp<="[95,100)") %>% 
  split(., .$year) %>% 
  ## for each year, calc the Gini coefficient for that pop
  lapply(., function(x) {
    data.table(
      year=x[[1, "year"]],
      gini_w_transfers=gini(x$total_wealth, weights=x$n)
    )
  }) %>% 
  rbindlist

gini_all <- left_join(gini_no_transfers, gini_w_transfers) %>% 
  filter(year %in% seq(2018,2050,5)) %>%
  ## calc difference in gini coefficient
  mutate(diff = gini_no_transfers - gini_w_transfers) %>% 
  pivot_longer(cols=contains("gini"), names_to="gini_type", values_to="gini_value")

## *** USED IN CHAPTER 
## Across the 30–99 year old population in 2048
## The Gini coefficient was also lower for the same population (43.6 without transfers compared with 43.1 with transfers).
gini_all
data.table(gini_all)[year==2048 & gini_type=="gini_no_transfers", "gini_value"] 
data.table(gini_all)[year==2048 & gini_type=="gini_w_transfers", "gini_value"]

gini_fall_w_transfers <- data.table(gini_all)[year==2048 & gini_type=="gini_no_transfers", "gini_value"] - data.table(gini_all)[year==2048 & gini_type=="gini_w_transfers", "gini_value"]


## COMPARE WITH HOUSING counterfactual SCENARIO
gini_housing_cf <- sim_results_housing_cf_tidy %>% 
  filter(year %in% seq(2018, 2050, 5)) %>% 
  ## filter age groups as well as some disappear over time
  filter(age_grp>="[30,35)" & age_grp<="[95,100)") %>%
  split(., .$year) %>% 
  ## for each year, calc the Gini coefficient for that pop
  lapply(., function(x) {
    data.table(
      year=x[[1, "year"]],
      gini_housing_cf=gini(x$total_wealth, weights=x$n)
    )
  }) %>% 
  rbindlist

## *** USed in chapter
## The wealth Gini coefficient in the housing counterfactual scenario was about 2 percentage points 
## higher than in the base scenario for the 30–99 year old age range in 2048 
## more than three times the size of the difference in the Gini coefficient attributed to future wealth transfers in the base scenario.
gini_housing_cf_compare <- left_join(gini_housing_cf, gini_w_transfers) %>% 
  filter(year %in% seq(2018,2050,5)) %>% 
  ## calc difference in gini coefficient
  mutate(diff = gini_housing_cf-gini_w_transfers) %>% 
  pivot_longer(cols=contains("gini"), names_to="gini_type", values_to="gini_value")
data.table(gini_housing_cf_compare)[year==2048 & gini_type=="gini_housing_cf", "gini_value"] - data.table(gini_housing_cf_compare)[year==2048 & gini_type=="gini_w_transfers", "gini_value"]

gini_fall_w_lower_housing_return <- data.table(gini_housing_cf_compare)[year==2048 & gini_type=="gini_housing_cf", "gini_value"] - data.table(gini_housing_cf_compare)[year==2048 & gini_type=="gini_w_transfers", "gini_value"]
gini_fall_w_lower_housing_return/gini_fall_w_transfers



# Wealth inequality (Gini) by generation ------------------------------------------------------

## no transfers Gini
gini_gen0_no_transfers <- sim_results_no_transfers_tidy %>% 
  filter(year %in% seq(2020, 2050, 5)) %>% 
  ## filter to Millenials and Gen X for chart
  filter(gen0!="Post-millennials" & gen0!="Pre-boomers" & gen0!="Boomers") %>% 
  filter(age_grp<="[95,100)") %>% ## won't change anything when limited to Millenials and Gen X
  group_by(year, gen0) %>% 
  group_split %>% 
  ## for each year, calc the Gini coefficient for that pop
  lapply(., function(x) {
    data.table(
      year= x[[1, "year"]],
      gen0= x[[1, "gen0"]],
      gini_no_transfers= gini(x$total_wealth, weights=x$n)
    )
  }) %>% 
  rbindlist

gini_gen0_w_transfers <- sim_results_w_transfers_tidy %>% 
  filter(year %in% seq(2020, 2050, 5)) %>% 
  ## filter to Millenials and Gen X for chart
  filter(gen0!="Post-millennials" & gen0!="Pre-boomers" & gen0!="Boomers") %>% 
  filter(age_grp<="[95,100)") %>% ## won't change anything when limited to Millenials and Gen X
  group_by(year, gen0) %>% 
  group_split %>% 
  ## for each year, calc the Gini coefficient for that pop
  lapply(., function(x) {
    data.table(
      year= x[[1, "year"]],
      gen0= x[[1, "gen0"]],
      gini_w_transfers= gini(x$total_wealth, weights=x$n)
    )
  }) %>% 
  rbindlist

gini_gen0_all <- left_join(gini_gen0_no_transfers, gini_gen0_w_transfers) %>% 
  filter(year %in% seq(2020,2050,5)) %>% ## display every 5 years due to model process spikiness
  mutate(diff = gini_no_transfers-gini_w_transfers) %>% 
  pivot_longer(cols=contains("gini"), names_to="gini_type", values_to="gini_value")


## low housing comparison
gini_gen0_housing_cf <- sim_results_housing_cf_tidy %>% 
  filter(year %in% seq(2020, 2050, 5)) %>% 
  ## filter to Millennials and Gen X
  filter(gen0!="Post-millennials" & gen0!="Pre-boomers" & gen0!="Boomers") %>% 
  filter(age_grp<="[95,100)") %>% 
  group_by(year, gen0) %>% 
  group_split %>% 
  ## for each year, calc the Gini coefficient for that pop
  lapply(., function(x) {
    data.table(
      year= x[[1, "year"]],
      gen0= x[[1, "gen0"]],
      gini_housing_cf = gini(x$total_wealth, weights=x$n)
    )
  }) %>% 
  rbindlist


gini_gen0_housing_cf_compare <- left_join(gini_gen0_housing_cf, gini_gen0_w_transfers) %>% 
  filter(year %in% seq(2020,2050,5)) %>% ## display every 5 years due to spikiness
  mutate(diff = gini_housing_cf-gini_w_transfers) %>% 
  pivot_longer(cols=contains("gini"), names_to="gini_type", values_to="gini_value")


## *** CHART USED IN chapter (figure 3.6)
gini_gen0_compare <- left_join(gini_gen0_all %>% filter(gini_type=="gini_w_transfers") %>% rename(transfers_diff=diff) %>% select(year, gen0, transfers_diff),
                               gini_gen0_housing_cf_compare %>% filter(gini_type=="gini_w_transfers") %>% rename(housing_diff=diff) %>% select(year, gen0, housing_diff)) %>% 
  pivot_longer(cols=contains("diff"), names_to="comparison", values_to="gini_reduction") %>% 
  mutate(comparison = factor(comparison,
                             levels = c("transfers_diff", "housing_diff"),
                             labels = c("Wealth transfers", "Lower housing return")))

gini_gen0_compare_plot <- ggplot(gini_gen0_compare ) +
  geom_line(aes(x=year, y=gini_reduction*100, colour=comparison)) +
  facet_wrap(vars(gen0)) +
  scale_y_continuous(breaks=seq(0, 1.2, 0.3), expand=c(0,0)) +
  scale_colour_pc() +
  ylab("Reduction in Gini coefficient") +
  xlab("Year") +
  theme(panel.spacing = unit(2, "lines")) +
  custom_plot_margin +
  ## Arrows and labels
  geom_segment(aes(x=2022, xend=2022, y=1.1, yend=1.3),
               colour= pc_grey,
               size=0.02,
               arrow=arrow(length = unit(0.2, "cm"), type="closed")
  ) +
  geom_text(aes(label = "More equal", x=2023.5, y=1.2), size=2.9, hjust=0)


emf(file=paste0("./Charts/gini_gen0_compare_plot_", Sys.Date(), ".emf"), 
    width = 15/2.54, height = 7.5/2.54,
    pointsize=12,
    family="Arial")
gini_gen0_compare_plot
dev.off()

ggsave(file=paste0("./Charts/gini_gen0_compare_plot_", Sys.Date(), ".svg"), 
       plot=gini_gen0_compare_plot,
       width = 15/2.54, height = 7.5/2.54)

## Chart used in exec summary - less detail
emf(file=paste0("./Charts/gini_gen0_compare_plot_simple_", Sys.Date(), ".emf"), 
    width = 15/2.54, height = 7.5/2.54,
    pointsize=12,
    family="Arial")
gini_gen0_compare_plot +
  ylab("Effect on relative wealth inequality") +
  theme(axis.text.y=element_blank(),
        axis.ticks.y=element_blank())
dev.off()

ggsave(file=paste0("./Charts/gini_gen0_compare_plot_simple_", Sys.Date(), ".svg"), 
       plot=gini_gen0_compare_plot +
         ylab("Effect on relative wealth inequality") +
         theme(axis.text.y=element_blank(),
               axis.ticks.y=element_blank()),
       width = 15/2.54, height = 7.5/2.54)


# Relative size of inheritance (to wealth and income) --------------------------------------------

## *** USED IN chapter
## The ratio of the size of the inheritances received relative to existing wealth is also projected to increase. For example, 
## for people aged 62–66 (the age group at which total received inheritances peaks), the ratio increases from 0.16 to 0.78 from 2020 to 2050. 
data.table(sim_results_year_age)[year %in% c(2020, 2050) & age_grp=="[60,65)", beqrec_wealth_share]

## *** USED IN chapter

## Share of lifetime model income - use the 30yr lifetime income for 50-75 yos as at 2050, and assume ppl in same age at previous years had same average
model_lifeinc_2050 <- sim_results_year_age %>% 
  ungroup() %>% 
  filter(year==2050) %>% 
  select(age_grp, av_model_lifetime_inc_2018) %>% 
  rename(model_lifetime_inc_2050 = av_model_lifetime_inc_2018)

## Average estimated cumulative income for the 62–66 year age group over the 32 years is $1.7 million, or about $54 000 per year.
data.table(model_lifeinc_2050)[age_grp=="[60,65)"]
data.table(model_lifeinc_2050)[age_grp=="[60,65)", model_lifetime_inc_2050] / 32

## A similar story is found when examining the ratio of received inheritances to cumulative model income of the 2050 cohort.  
## This ratio increases from 0.08 to 0.43 for people aged 62–66.
beqrec_share_lifeinc_age <- sim_results_year_age %>% 
  filter(year %in% c(seq(2020, 2050, 10))) %>% 
  filter(age_grp>="[50,55)" & age_grp<="[70,75)" ) %>% 
  left_join(model_lifeinc_2050) %>% 
  mutate(beqrec_model_inc_share_2050 = bequest_received_av_real / model_lifetime_inc_2050) %>% 
  select(year, age_grp, beqrec_model_inc_share_2050)
data.table(beqrec_share_lifeinc_age)[year %in% c(2020, 2050) & age_grp=="[60,65)", beqrec_model_inc_share_2050]


## *** USED IN chapter
## In 2048, inheritance recipients aged 60–64 who are in the bottom third of the wealth distribution for their age group 
## are projected to receive an average of $230 000 in the base scenario model, which is about 69 per cent of their wealth before the inheritance.
## Those in the top third of the wealth distribution are projected to receive an average of $929 000  — while this is larger in absolute terms,
## it is about 60 per cent of their existing wealth.
#View(sim_results_year_age_wlth)
data.table(sim_results_year_age_wlth)[year==2048 & age_grp=="[60,65)", c("year", "age_grp", "age_wlth_grp3", "beqrec_wealth_share", "bequest_received_av_real")]


# Effect of number of inheritance recipients to deaths ---------------------------------------------------------

## *** USED IN CHAPTER
## number of receivers vs parents dying
## There is also a rise in the average size of received inheritances, driven by a 50 per cent fall 
## in the ratio of inheritance recipients to deaths (from about 2:1 to 1:1) over the projection period. 

implied_kids <- sim_results_w_transfers_tidy %>% 
  group_by(year) %>% 
  summarise( deaths = sum(end_year_deaths),
             bequest_recips = sum(prev_parent_deaths_add)   ) %>% 
  mutate( n_beqrec_death_ratio = bequest_recips/lag(deaths, 1)) ## relative to prev year deaths because beqs received in the year after

ggplot(implied_kids ) +
  geom_line(aes(x = year, y=n_beqrec_death_ratio)) +
  scale_y_continuous(limits=c(0,2.5))


## HILDA children:
hilda_grouped <- qread("./Input data/Intermediate input data/hilda_grouped_master.qs")

children <- hilda_grouped %>% 
  mutate(children = case_when(
    tcr>=0 & tcnr>=0 ~ tcr + tcnr
  )) %>% 
  filter(!is.na(children)) %>% 
  group_by(wavenumber, age_grp, children) %>% 
  summarise(n = sum(hhwtrp))

## number of children you have
## *** USED IN CHAPTER 
## Analysis of 2018 HILDA data shows that about 55 per cent of people aged 80–84 had more than two children, 
## whereas it was only 35 per cent for people aged 50–54. 
children_over2 <- children %>% 
  filter(wavenumber==18) %>% 
  mutate(children_over2 = ifelse(children>2, 1, 0)) %>% 
  group_by(age_grp, children_over2) %>% 
  summarise(n=sum(n)) %>% 
  group_by(age_grp) %>% 
  mutate(n_share = n/sum(n))
data.table(children_over2)[age_grp=="[80,85)",]
data.table(children_over2)[age_grp=="[50,55)",]




# Misc in text values used in chapter ------------------------------------------------------------

## cf low housing - average wealth of 60-65s
## when housing grows at its historical rate of 7 per cent in the long run rather than 4 per cent, 
## average wealth accumulated over the model span by people aged 60–64 nearly doubled ($1.8 million by 2048 compared with $1 million in constant dollars).
data.table(sim_results_year_age)[age_grp=="[60,65)" & year==2048, "total_wealth_av_real"]
data.table(sim_results_historic_housing_year_age)[age_grp=="[60,65)" & year==2048, "total_wealth_av_real"]


## total wealth at death and intergenerational bequests
## Total wealth at death was projected to increase by a factor of 3.3 between 2020 and 2050, 
# with the total amount passed onto the next generation increasing by a factor of 3.9. 
# The larger factor increase in intergenerational inheritances is because older age groups make up a larger share of total deaths in 2050 
# and are less likely to have a surviving spouse. 
data.table(sim_results_year)[year==2050, "total_wealth_at_death_tot_real"] / data.table(sim_results_year)[year==2020, "total_wealth_at_death_tot_real"]
data.table(sim_results_year)[year==2050, "bequest_given_intergen_tot_real"] / data.table(sim_results_year)[year==2020, "bequest_given_intergen_tot_real"]

deaths_by_age <- sim_results_year_age %>%
  filter(year %in% c(2020, 2050) & age_grp>="[30,35)") %>%
  select(year, age_grp, total_wealth_at_death_tot_real, bequest_given_intergen_tot_real, deaths) %>%
  group_by( age_grp) %>%
  mutate(increase_wealth_at_death = total_wealth_at_death_tot_real[2]/total_wealth_at_death_tot_real[1],
         increase_bequest = bequest_given_intergen_tot_real[2] / bequest_given_intergen_tot_real[1]) %>% 
  group_by(year) %>% 
  mutate(death_pc = deaths/sum(deaths))


## total wealth at death decomposition 2020 and 2050
## - how much of the increase is due to death numbers and how much is due to increase in average wealth.
## Decomposing the increase in total wealth at death (into deaths and average wealth) shows that it is largely driven 
## by an increase in the number of deaths, which was projected to double. 
wealth_death_decomp <- sim_results_year %>% 
  filter(year %in% c(2020, 2050)) %>% 
  select(year, deaths, total_wealth_at_death_tot_real, total_wealth_at_death_av_real) %>% 
  mutate(tot_increase_factor = total_wealth_at_death_tot_real[2]/total_wealth_at_death_tot_real[1],
         death_increase_factor = deaths[2]/deaths[1], ## increase in deaths leads to a doubling of the amount of wealth at death
         av_increase_factor = total_wealth_at_death_av_real[2] / total_wealth_at_death_av_real[1] ) ## increase in av wealth at death is 4 times 
wealth_death_decomp


## The size of gifts fell from about 23 per cent of the size of intergenerational inheritances in the 2019 base scenario projections to only 6 per cent in 2050.
## The total size of gifts given was only 6 per cent of the size of intergenerational inheritances in the 2050 base scenario projections.
data.table(sim_results_year)[year==2019, "gift_given_tot_real"] / data.table(sim_results_year)[year==2019, "bequest_given_intergen_tot_real"]
data.table(sim_results_year)[year==2050, "gift_given_tot_real"] / data.table(sim_results_year)[year==2050, "bequest_given_intergen_tot_real"]


## Home ownership rate with and without transfers
##  81 per cent of people aged 55–59 are homeowners in 2048 in the model with future wealth transfers, 
## marginally higher than the 79 per cent in the model without transfers.
sim_results_w_transfers_tidy %>% 
  filter(age_grp=="[55,60)" & year %in% c(2018, 2048)) %>% 
  group_by(year) %>% 
  summarise(ho_rate = sum(ho*n)/sum(n)) 

sim_results_no_transfers_tidy %>% 
  filter(age_grp=="[55,60)" & year %in% c(2018, 2048)) %>% 
  group_by(year) %>% 
  summarise(ho_rate = sum(ho*n)/sum(n)) 


## INTERQUARTILE RANGE
## The range of wealth for people aged 30–99 at the 25th and 75th percentiles is projected to be $673 000 (2018 dollars) in 2048. 
## In a world without future wealth transfers between generations, the interquartile range in 2048 is marginally lower at about $660 000. 
sim_results_box_2048_w_transfers <- sim_results_box %>% filter(year==2048 & wealth_type=="total_wealth_w_transfers_real" )

sim_results_box_2048_w_transfers_qtile <- wtd.quantile(sim_results_box_2048_w_transfers$value,
                                                       weights = sim_results_box_2048_w_transfers$n,
                                                       probs = c(0.25, 0.5, 0.75))
sim_results_box_2048_w_transfers_qtile[3]-sim_results_box_2048_w_transfers_qtile[1]

sim_results_box_2048_no_transfers <- sim_results_box %>% filter(year==2048 & wealth_type=="total_wealth_no_transfers_real" )
sim_results_box_2048_no_transfers_qtile <- wtd.quantile(sim_results_box_2048_no_transfers$value,
                                                        weights = sim_results_box_2048_no_transfers$n,
                                                        probs = c(0.25, 0.5, 0.75))
sim_results_box_2048_no_transfers_qtile[3]-sim_results_box_2048_no_transfers_qtile[1]



## 80:20 ratio
## The ratio of wealth held by the top 20th percentile to that held by the bottom 20th percentile — the 80:20 ratio — 
## was lower for people aged 30–99 in 2048 after accounting for wealth transfers (4.67 without transfers compared with 4.59 with transfers), 
## indicating improved relative wealth equality. 
sim_results_box_2048_no_transfers_8020 <- wtd.quantile(sim_results_box_2048_no_transfers$value,
                                                       weights = sim_results_box_2048_no_transfers$n,
                                                       probs = c(0.20, 0.80))
sim_results_box_2048_no_transfers_8020[2]/sim_results_box_2048_no_transfers_8020[1]

sim_results_box_2048_w_transfers_8020 <- wtd.quantile(sim_results_box_2048_w_transfers$value,
                                                      weights = sim_results_box_2048_w_transfers$n,
                                                      probs = c(0.20, 0.80))
sim_results_box_2048_w_transfers_8020[2]/sim_results_box_2048_w_transfers_8020[1]



## distribution of inheritances received
hilda_grouped <- qread("./Input data/Intermediate input data/hilda_grouped_master.qs")

hilda_bequest_dist <- hilda_grouped %>% 
  ## bequest recips only
  filter(bequests>0) %>% 
  ## by wave, what inheritance quintile are you in?
  split(., .$wavenumber) %>% 
  ## for each wave, create inheritance quintile variables
  lapply(., 
         function(x) {
           x %>% 
             mutate(  
               inheritance_qtile = cut(bequests, 
                                       ## cut points determined by weighted quantile (only works if cut points are unique)
                                       breaks = Hmisc::wtd.quantile(.$bequests, 
                                                                    weights = .$hhwtrp, 
                                                                    probs = seq(0, 1, 1/5)), 
                                       include.lowest=T,
                                       labels = c(1:5),
                                       ordered_result=T))
         }
  ) %>% 
  rbindlist %>% 
  ## summary stats by quintile
  group_by(wavenumber, inheritance_qtile) %>% 
  summarise(sample = n(),
            n = sum(hhwtrp),
            bequest_av = sum(bequests*hhwtrp)/n,
            bequest_tot = sum(bequests*hhwtrp)) %>% 
  group_by(wavenumber) %>% 
  mutate(bequest_cumu = cumsum(bequest_tot),
         bequest_cumu_share = bequest_cumu/bequest_cumu[5]*100) 

hilda_bequest_dist_plot <- ggplot(hilda_bequest_dist %>% filter(wavenumber>=10)) +
  geom_line(aes(x=as.integer(inheritance_qtile), y=bequest_cumu_share, colour=as.factor(wavenumber)) ) +
  geom_col(aes(x=as.integer(inheritance_qtile), y=bequest_av/6000, fill=as.factor(wavenumber)), position="dodge" ) +
  scale_y_continuous(sec.axis = sec_axis( ~ . *6000 , name="Average inheritance ($)")) +
  ylab("Cumulative inheritances received (%)") +
  xlab("Recipient's inheritance quintile")
## Specifically, they projected that over three quarters of the total wealth transferred in the future would accrue to 
## 20 per cent of inheritance recipients — broadly in line with the share of total inheritances that went to the 
## top 20 per cent of inheritance recipients over the past 10 years (Commission estimates using HILDA Restricted Release 19)”




# APPENDIX CHARTS AND VALUES ----------------------------------------------



# Projected population ----------------------------------------------------

## *** chart in appendix figure C.4
## AREA chart of n by gen0
n_year_gen0_area <- ggplot(sim_results_year_gen0) +
  geom_area(aes(x=year, y=n_tot/1000000, fill=gen0)) +
  scale_fill_pc(labels = unique(sim_results_year_gen0$gen0) %>% paste0("  ")) +
  ylab("Projected model population (million)") +
  xlab("Year") +
  scale_x_continuous(breaks = seq(2020,2050,5), expand=c(0,0)) +
  scale_y_continuous(expand = c(0,0), limits=c(0, 25), sec.axis = dup_axis(name="") ) +
  custom_plot_margin

emf(file=paste0("./Charts/projected_n_", Sys.Date(), ".emf"), 
    width = 15/2.54, height = 7.5/2.54,
    pointsize=12,
    family="Arial")
n_year_gen0_area
dev.off()

ggsave(file=paste0("./Charts/projected_n_", Sys.Date(), ".svg"), 
       plot=n_year_gen0_area, 
       width = 15/2.54, height = 7.5/2.54)


## *** Chart IN APPENDIX figure C.5
## number of people by age and inc dist
inc_age_year10_col <- sim_results_year_age_inc %>% 
  filter(year %in% c(2018, 2048) & age_grp>="[30,35)") %>% 
  mutate(year = ifelse(year==2048, "2048 (projected)", as.character(year))) %>% 
  ggplot +
  geom_col(aes(x = age_grp, y=n_tot/1000, fill=total_inc_qtile)) +
  facet_wrap(vars(year)) +
  scale_x_discrete(breaks=unique(sim_results_year_age_inc$age_grp)[seq(1, 21, 3)],
                   labels = unique(sim_results_year_age$age_grp)[seq(1, 21, 3)] %>% age_grp_labeller()) +
  scale_y_continuous(expand = c(0,0), limits=c(0, 1900)) +
  scale_fill_pc(labels=c("Q1", "Q2", "Q3", "Q4", "Q5")) +
  ylab("Population ('000)") +
  xlab("Age group") +
  theme(panel.spacing = unit(2, "lines")) +
  custom_plot_margin

emf(file=paste0("./Charts/projected_n_age_inc_", Sys.Date(), ".emf"), 
    width = 15/2.54, height = 7.5/2.54,
    pointsize=12,
    family="Arial")
inc_age_year10_col
dev.off()

ggsave(file=paste0("./Charts/projected_n_age_inc_", Sys.Date(), ".svg"), 
       plot=inc_age_year10_col, 
       width = 15/2.54, height = 7.5/2.54)

## *** Chart IN APPENDIX figure C.6
## homeownership rates
ho_rates <- sim_results_year_age_ho %>% 
  filter(year %in% c(seq(2018, 2050, 10)) & age_grp!="[100,105]") %>% 
  group_by(year, age_grp) %>% 
  summarise(ho_rate = sum(ho*n_tot)/sum(n_tot)) %>% 
  ggplot +
  geom_col(aes(x=age_grp, y=ho_rate*100, fill=as.factor(year)), position="dodge") +
  scale_x_discrete(breaks=unique(sim_results_year_age_inc$age_grp)[seq(1, 21, 2)],
                   labels= unique(sim_results_year_age_inc$age_grp)[seq(1, 21, 2)] %>% age_grp_labeller) +
  scale_y_continuous(expand=c(0,0)) +
  ylab("Home ownership rate (%)") +
  xlab("Age group") +
  scale_fill_pc() +
  custom_plot_margin

emf(file=paste0("./Charts/ho_rates_10year_col_", Sys.Date(), ".emf"), 
    width = 15/2.54, height = 8/2.54,
    pointsize=12,
    family="Arial")
ho_rates
dev.off()

ggsave(file=paste0("./Charts/ho_rates_10year_col_", Sys.Date(), ".svg"), 
       plot=ho_rates, 
       width = 15/2.54, height = 8/2.54)

### Chart IN APPENDIX figure C.7
## Bequest received rates 
beqrec_rates <- sim_results_year_age_beqrec %>% 
  filter(year %in% c(seq(2018, 2050, 10))) %>% 
  group_by(year, age_grp) %>% 
  summarise(beqrec_rate = sum(beqrec*n_tot)/sum(n_tot)) %>% 
  ggplot +
  geom_col(aes(x=age_grp, y=beqrec_rate*100, fill=as.factor(year)), position="dodge") +
  scale_x_discrete(breaks=unique(sim_results_year_age_inc$age_grp)[seq(1, 21, 2)],
                   labels= unique(sim_results_year_age_inc$age_grp)[seq(1, 21, 2)] %>% age_grp_labeller) +
  scale_y_continuous(expand=c(0,0)) +
  ylab("Share of population who have\nreceived an inheritance (%)") +
  xlab("Age group") +
  scale_fill_pc() +
  custom_plot_margin

emf(file=paste0("./Charts/beqrec_rates_10year_col_", Sys.Date(), ".emf"), 
    width = 15/2.54, height = 8/2.54,
    pointsize=12,
    family="Arial")
beqrec_rates
dev.off()

ggsave(file=paste0("./Charts/beqrec_rates_10year_col_", Sys.Date(), ".svg"), 
       plot=beqrec_rates, 
       width = 15/2.54, height = 8/2.54)